
global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/make_inventorcountry.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"



capture noi {

******************************
* inventor weights pre sample*
* V1                       *
******************************

qui do ${code_dir}/config/country_list.do

use ${dataset_dir}/weights/bvdid_inventor_weights_all${weight_window}_1995_orbis2017.dta, clear 

*merge in patent weights as stopgap for ties
mmerge BvD using ${dataset_dir}/weights/bvdid_pat_weights_EPtr${weight_window}_1995_orbis2017_${wtype}.dta

reshape long share_invt_1995_ share2_all_1995_, i(BvDIDnumber) string

drop if share_invt_1995_ == .

gen HQcountry = substr(BvD,1,2)

*Select largest inventor contribution for each BvDIDnumber and set homecountry
by BvDIDnumber (share_invt_1995_ share2_all_1995_ _j), sort : gen m = _j if _n == _N


by BvD: gen highest_inventor_share = share_invt_1995_ if m == _j
by BvD: replace highest_inventor_share = highest_inventor_share[_N] if highest_inventor_share == .
by BvD: gen temp_tie = 1 if share_invt_1995_ == highest_inventor_share
by BvD: gen temptiesum = sum(temp_tie)
by BvD: gen temptiemax = temptiesum[_N]
by BvD: gen temptie_plus = 1 if temptiemax > 1 & temp_tie == 1 & (share2_all_1995[_N] == . | share2_all_1995[_N] == 0)
by BvD: gen has_HQcountry = sum(_j == HQcountry & temptie_plus == 1)
by BvD: replace m = HQcountry if has_HQcountry > 0 & temptie_plus == 1
by BvD: gen random = 1 if has_HQcountry == 0 & temptie_plus == 1 & m != ""
by BvD: replace m = "" if has_HQcountry == 0 & temptie_plus == 1
by BvD: gen rasum = sum(random)
by BvD: gen ranmax = rasum[_N]

by BvDIDnumber (share_invt_1995_ share2_all_1995_ _j), sort : replace m = _j if _n == _N & m == ""


*distribute to rest of observations
by BvDIDnumber, sort : replace m = m[_N] if m == ""
drop _m HQcountry temptie* temp_tie has_HQcountry random rasum ranmax
reshape wide

rename m main_inventor_country 

label variable main_inventor_country "Country with most inventor contributions for BvDIDnumber in Period"

ds share_invt_1995_*
local r: di r(varlist)
foreach var of local r { 
    label var `var' "Share of inventor contributions in period"
}

drop share2_all_1995_*



*take out maximum share of inventor contributions
egen share_main_invtctry_1995 = rowmax(share_invt_1995_*)

* just to check if the weights sum to 1 (should)
egen total_weight = rowtotal(share_invt_1995_*)

keep BvDIDnumber main_inventor_country share_main_invtctry_1995

save ${dataset_dir}/weights/bvdid_main_inventor_all${weight_window}_1995_orbis2017_restricted.dta, replace

*********************
*now hq country		*
*********************

*******************************
*make indepvars for hq country*
*******************************
use ${dataset_dir}/dep_vars/bvd_year_list${weight_window}_${wtype}.dta, clear
keep BvD year
* Add GDP PER CAPITA (constant prices, USD)
mmerge year using ${dataset_dir}/indep_vars/loggdp_gap_wide_TOTAL.dta, unmatched(none)
gen hqctry=substr(BvD,1,2)


    
    mmerge BvD using ${dataset_dir}/weights/bvdid_pat_weights_EPtr${weight_window}_1995_orbis2017_${wtype}.dta, unmatched(master)
    gen missing_weights_1995=(_m==1)
	drop _m
	cross using ${dataset_dir}/weights/shares_GDP_allctries_1995.dta

    sort BvD year
    *generating weights

    foreach vv in lngdpgap {
		foreach ctry of global countrylist1995 {
            			noisily capture confirm variable `vv'_`ctry'
			if _rc != 0 {
				gen `vv'_`ctry' = .
			}


			gen weight_`ctry' = share2_all_1995_`ctry'
			replace weight_`ctry' = share_GDP_`ctry' if weight_`ctry' == . 
			gen `vv'_wtd_`ctry' = weight_`ctry'* `vv'_`ctry'
			gen ln`vv'_wtd_`ctry' = weight_`ctry'* ln(`vv'_`ctry')
		}
        **
        egen shr2_all_1995 = rowtotal(share2_all_1995_??), missing
        **
		egen `vv'_ALL_1995_wtd = rowtotal(`vv'_wtd_??), missing
		egen ln`vv'_ALL_1995_wtd = rowtotal(ln`vv'_wtd_??), missing


        * Based on hq home country

		gen `vv'_shr_hq_1995_wtd = .
        
        gen hqctry_weight = .
        
		foreach ctry of global countrylist1995{

			replace `vv'_shr_hq_1995_wtd = `vv'_wtd_`ctry' if hqctry == "`ctry'"
            replace hqctry_weight = weight_`ctry' if hqctry == "`ctry'"
		}
        
		gen `vv'_shr_nonhq_1995_wtd = `vv'_ALL_1995_wtd - `vv'_shr_hq_1995_wtd
        **
        replace `vv'_shr_nonhq_1995_wtd = `vv'_ALL_1995_wtd if `vv'_shr_nonhq_1995_wtd == .
        **
		replace `vv'_shr_nonhq_1995_wtd = . if hqctry_weight == 1
       
        *lets see how that line above does
		* Version 2: take home country wage and average foreign country wage (i.e. normalize)
		gen `vv'_shr2_hq_1995_wtd = `vv'_shr_hq_1995_wtd / hqctry_weight 
		gen `vv'_shr2_nonhq_1995_wtd = `vv'_shr_nonhq_1995_wtd / (1-hqctry_weight)
        **
        replace `vv'_shr2_nonhq_1995_wtd = `vv'_shr_nonhq_1995_wtd / shr2_all_1995 if `vv'_shr2_nonhq_1995_wtd == .
        **
    
	drop *`vv'_wtd_* weight_??
	
}
drop share*
drop lnlngdpgap*


ren hqctry_weight maxweight_1995 


drop lngdpgap_??
drop hqctry

compress
save ${dataset_dir}/indep_vars/bvd_year_devgdp_sharesgdpweighted${weight_window}_${wtype}_hqctry.dta, replace

clear


global sectors MANUF TOTAL


************************************
* Wage and VA emp for manufacturing
************************************

local aMANUF "m"
local aTOTAL "t"
local fMANUF "manuf"
local fTOTAL "totind"
foreach sector in $sectors {
	local m `a`sector''
	use ${dataset_dir}/dep_vars/bvd_year_list${weight_window}_${wtype}.dta, clear
	keep BvD year
	* Add wages
	mmerge year using ${dataset_dir}/indep_vars/lswages_wide_`sector'.dta, unmatched(none)
	mmerge year using ${dataset_dir}/indep_vars/hswages_wide_`sector'.dta, unmatched(none)

	* note: no wage data in 2010 (kept for lagging dep var)
	* Add VA per employee
	mmerge year using ${dataset_dir}/indep_vars/vaemployee_wide_`sector'.dta, unmatched(none)
    gen hqctry=substr(BvD,1,2)
    
	*
	* weights based on 10-years patent portfolio pre-1980, etc

		mmerge BvD using ${dataset_dir}/weights/bvdid_pat_weights_EPtr${weight_window}_1995_orbis2017_${wtype}, unmatched(master)
		gen missing_weights_1995=(_m==1)
		drop _m
		cross using ${dataset_dir}/weights/shares_GDP_allctries_1995

        sort BvD year
*		foreach vv in lswLP mswLP hswLP vaempLP minwLP {
		foreach vv in hswMP lswMP vaempMP {
			foreach ctry of global countrylist1995 {
				noisily capture confirm variable `vv'_`ctry'
                if _rc != 0 {
				gen `vv'_`ctry' = .
			}
            
				gen weight_`ctry'= share2_all_1995_`ctry'
				replace weight_`ctry'= share_GDP_`ctry' if weight_`ctry' == .
		
				gen `vv'_wtd_`ctry' = weight_`ctry' * `vv'_`ctry'
				gen ln`vv'_wtd_`ctry' = weight_`ctry' * ln(`vv'_`ctry')
			}
			noisily capture confirm variable share2_all_1995
                if _rc != 0 {
            		egen share2_all_1995 = rowtotal(share2_all_1995_??), missing
				}

			egen `vv'`m'_ALL_1995_wtd = rowtotal(`vv'_wtd_??), missing
			egen ln`vv'`m'_ALL_1995_wtd = rowtotal(ln`vv'_wtd_??), missing

            
			* Based on hq country
			gen ln`vv'`m'_shr_hq_1995_wtd = .
			gen `vv'`m'_shr_hq_1995_wtd = .
            
            gen hqctry_weight = .


			foreach ctry of global countrylist1995 {
				replace ln`vv'`m'_shr_hq_1995_wtd = ln`vv'_wtd_`ctry' if hqctry== "`ctry'"
				replace `vv'`m'_shr_hq_1995_wtd = `vv'_wtd_`ctry' if hqctry== "`ctry'"
                replace hqctry_weight = weight_`ctry' if hqctry== "`ctry'"
			}
			gen ln`vv'`m'_shr_nonhq_1995_wtd = ln`vv'`m'_ALL_1995_wtd - ln`vv'`m'_shr_hq_1995_wtd
			gen `vv'`m'_shr_nonhq_1995_wtd = `vv'`m'_ALL_1995_wtd - `vv'`m'_shr_hq_1995_wtd
            **
            replace ln`vv'`m'_shr_nonhq_1995_wtd = ln`vv'`m'_ALL_1995_wtd if ln`vv'`m'_shr_nonhq_1995_wtd == .
            replace `vv'`m'_shr_nonhq_1995_wtd = `vv'`m'_ALL_1995_wtd if `vv'`m'_shr_nonhq_1995_wtd == .
            **
			replace ln`vv'`m'_shr_nonhq_1995_wtd = . if hqctry_weight == 1
			replace `vv'`m'_shr_nonhq_1995_wtd = . if hqctry_weight == 1
			* Version 2: take home country wage and average foreign country wage (i.e. normalize)
			gen ln`vv'`m'_shr2_hq_1995_wtd = ln`vv'`m'_shr_hq_1995_wtd / hqctry_weight
			gen ln`vv'`m'_shr2_nonhq_1995_wtd = ln`vv'`m'_shr_nonhq_1995_wtd / (1-hqctry_weight)
			gen `vv'`m'_shr2_hq_1995_wtd = `vv'`m'_shr_hq_1995_wtd / hqctry_weight
			gen `vv'`m'_shr2_nonhq_1995_wtd = `vv'`m'_shr_nonhq_1995_wtd / (1-hqctry_weight)
            **
            replace `vv'`m'_shr2_nonhq_1995_wtd = `vv'`m'_shr_nonhq_1995_wtd / share2_all_1995 if `vv'`m'_shr2_nonhq_1995_wtd == .
            replace ln`vv'`m'_shr2_nonhq_1995_wtd = ln`vv'`m'_shr_nonhq_1995_wtd / share2_all_1995 if ln`vv'`m'_shr2_nonhq_1995_wtd == .
            **
			drop hqctry_weight			
			drop *`vv'_wtd_* weight_??
		}
		drop share*
	

	drop lsw??_?? hsw??_?? vaemp??_??
	compress
	save ${dataset_dir}/indep_vars/bvd_year_wages_vaemp_`f`sector''_sharesgdpweighted${weight_window}_${wtype}_hqctry.dta, replace
}
clear

********************************
* GDP per capita
********************************


local aMANUF "m"
local aTOTAL "t"
local fMANUF "manuf"
local fTOTAL "totind"
foreach sector in $sectors {
	local m `a`sector''
	use ${dataset_dir}/dep_vars/bvd_year_list${weight_window}_${wtype}.dta, clear
	keep BvD year
	* Add GDP PER CAPITA (constant prices, USD)
	mmerge year using ${dataset_dir}/indep_vars/gdp_percapita_wide_`sector'.dta, unmatched(none)
    gen hqctry=substr(BvD,1,2)
	* weights based on 10-years patent portfolio pre-1980, etc

mmerge BvD using ${dataset_dir}/weights/bvdid_pat_weights_EPtr${weight_window}_1995_orbis2017_${wtype}.dta, unmatched(master)
		gen missing_weights_1995=(_m==1)
		drop _m
		cross using ${dataset_dir}/weights/shares_GDP_allctries_1995.dta

		sort BvD year
		foreach vv in gdppcDP gdppcLP gdppcMP gdppcLG gdppcMG {
			foreach ctry of global countrylist1995{
				noisily capture confirm variable `vv'_`ctry'
				if _rc != 0 {
					gen `vv'_`ctry' = .
				}
				gen weight_`ctry' = share2_all_1995_`ctry'
				replace weight_`ctry' = share_GDP_`ctry' if weight_`ctry' == .
				
				gen `vv'_wtd_`ctry' = weight_`ctry'* `vv'_`ctry'
				gen ln`vv'_wtd_`ctry' = weight_`ctry'* ln(`vv'_`ctry')
			}
			egen `vv'`m'_ALL_1995_wtd = rowtotal(`vv'_wtd_??), missing
			egen ln`vv'`m'_ALL_1995_wtd = rowtotal(ln`vv'_wtd_??), missing
            noisily capture confirm variable share2_all_1995
                if _rc != 0 {
            		egen share2_all_1995 = rowtotal(share2_all_1995_??), missing
				}


			* Based on hq country
			gen `vv'`m'_shr_hq_1995_wtd = .
			gen hqctry_weight = .

			foreach ctry of global countrylist1995{
			
				replace `vv'`m'_shr_hq_1995_wtd = `vv'_wtd_`ctry' if hqctry== "`ctry'"
                replace hqctry_weight = weight_`ctry' if hqctry== "`ctry'"
			}

			gen `vv'`m'_shr_nonhq_1995_wtd = `vv'`m'_ALL_1995_wtd - `vv'`m'_shr_hq_1995_wtd
			 **
            replace `vv'`m'_shr_nonhq_1995_wtd = `vv'`m'_ALL_1995_wtd if `vv'`m'_shr_nonhq_1995_wtd == .
            **
			replace `vv'`m'_shr_nonhq_1995_wtd = . if hqctry_weight == 1

			* Version 2: take home country wage and average foreign country wage (i.e. normalize)

			gen `vv'`m'_shr2_hq_1995_wtd = `vv'`m'_shr_hq_1995_wtd / hqctry_weight
			gen `vv'`m'_shr2_nonhq_1995_wtd = `vv'`m'_shr_nonhq_1995_wtd / (1-hqctry_weight)
			**
            replace `vv'`m'_shr2_nonhq_1995_wtd = `vv'`m'_shr_nonhq_1995_wtd / share2_all_1995 if `vv'`m'_shr2_nonhq_1995_wtd == .

            **
			drop hqctry_weight

    drop *`vv'_wtd_* weight_??
    }

    drop share*


drop gdppc??_??
compress
save ${dataset_dir}/indep_vars/bvd_year_gdpcap_`f`sector''_sharesgdpweighted${weight_window}_${wtype}_hqctry.dta, replace
}



******************************
*final dataset for hq country*
******************************
qui do ${code_dir}/config/country_list.do

// What weights use? '_from1970' or ''
*global weight_window "_10yr"
// Do we restrict weights on technical fields or not?
* global wtype "tfacit1"
// What sectors do we build?
global sectors MANUF TOTAL


use ${dataset_dir}/dep_vars/bvd_year_depvars.dta, clear
mmerge BvD year using ${dataset_dir}/dep_vars/bvd_year_list${weight_window}_${wtype}.dta, unmatched(none)


mmerge BvD year using ${dataset_dir}/indep_vars/bvd_year_devgdp_sharesgdpweighted${weight_window}_${wtype}_hqctry.dta, unmatched(master)
mmerge BvD year using ${dataset_dir}/indep_vars/bvd_year_wages_vaemp_totind_sharesgdpweighted${weight_window}_${wtype}_hqctry.dta, unmatched(master)
mmerge BvD year using ${dataset_dir}/indep_vars/bvd_year_gdpcap_manuf_sharesgdpweighted${weight_window}_${wtype}_hqctry.dta, unmatched(master)
mmerge BvD year using ${dataset_dir}/indep_vars/bvd_year_wages_vaemp_manuf_sharesgdpweighted${weight_window}_${wtype}_hqctry.dta, unmatched(master)
mmerge BvD year using ${dataset_dir}/indep_vars/bvd_year_gdpcap_totind_sharesgdpweighted${weight_window}_${wtype}_hqctry.dta, unmatched(master)


if "$weight_window" == "_from1970" && "$wtype" == "tfacit1" {
	mmerge BvD year using ${dataset_dir}/indep_vars/bvd_year_lintr_manuf_sharesgdpweighted${weight_window}_${wtype}.dta, unmatched(master)
}


mmerge BvD using ${final_dir}/BvD_industry.dta, unmatched(master)
replace industry = "NA" if _m == 1
drop _m


*lngdp_gap averages
foreach yy in 1995{

    foreach vv in lngdpgap { 
		ren `vv'_ALL_1995_wtd `vv'_1995_a
	}

    foreach vv in lngdpgap {
 		foreach vr in shr_hq shr_nonhq shr2_hq shr2_nonhq {
 			ren `vv'_`vr'_1995_wtd `vv'_`vr'_1995_a
 		}
 	}
}
*lngdp done
**********************************************
*logs and avg for the rest of the variables, need to check which are actually present/required later
*these are just the sections that even pertain to those variables

	foreach v in lsw hsw {
		foreach d in MP  {
			gen `v'`d'm_1995_a = ln(`v'`d'm_ALL_1995_wtd )
			ren ln`v'`d'm_ALL_1995_wtd ln`v'`d'm_1995_a
			gen `v'`d't_1995_a = ln(`v'`d't_ALL_1995_wtd )
			ren ln`v'`d't_ALL_1995_wtd ln`v'`d't_1995_a
		}
	}

	foreach d in MP{
		gen gdppc`d'm_1995_a = ln(gdppc`d'm_ALL_1995_wtd )
		gen gdppc`d't_1995_a = ln(gdppc`d't_ALL_1995_wtd )
		ren lngdppc`d'm_ALL_1995_wtd lngdppc`d'm_1995_a
		ren lngdppc`d't_ALL_1995_wtd lngdppc`d't_1995_a

		gen vaemp`d'm_1995_a = ln(vaemp`d'm_ALL_1995_wtd )
		ren lnvaemp`d'm_ALL_1995_wtd lnvaemp`d'm_1995_a
		gen vaemp`d't_1995_a = ln(vaemp`d't_ALL_1995_wtd )
		ren lnvaemp`d't_ALL_1995_wtd lnvaemp`d't_1995_a
	}



*averages for the rest of the variables


	foreach v in lsw hsw vaemp {
		foreach x in m t {
			foreach d in MP {
				foreach vr in shr_hq shr_nonhq shr2_hq shr2_nonhq { 
					cap gen `v'`d'`x'_`vr'_1995_a = ln(`v'`d'`x'_`vr'_1995_wtd)
				}
			}
		}
	}




	foreach x in m {
		foreach d in MP {
			foreach vr in shr_hq shr_nonhq shr2_hq shr2_nonhq {
				gen gdppc`d'`x'_`vr'_1995_a = ln(gdppc`d'`x'_`vr'_1995_wtd)
			}
		}
	}





	qui ds *_shr_hq_1995_a  
		foreach vaar in `r(varlist)' {
			local vv = substr("`vaar'",1,strpos("`vaar'","_")-1)
			if "`vv'" == "lngdpgap" { 
				continue
			}
			gen term_hq_var = exp(`vv'_shr2_hq_1995_a)/exp(`vv'_1995_a)*maxweight_1995
			gen term_nonhq_var = exp(`vv'_shr2_nonhq_1995_a)/exp(`vv'_1995_a)*(1-maxweight_1995)
			**
			replace term_nonhq_var = exp(`vv'_shr2_nonhq_1995_a)/exp(`vv'_1995_a)*shr2_all_1995 if term_nonhq_var == .
			**
			gen _term_hq = term_hq_var if year==1995
			gen _term_nonhq = term_nonhq_var if year==1995
			bys lse_id : egen term_hq_fixed = max(_term_hq)
			bys lse_id : egen term_nonhq_fixed = max(_term_nonhq)
			bys lse_id : egen term_hq_mean = mean(term_hq_var)
			bys lse_id : egen term_nonhq_mean = mean(term_nonhq_var)
			gen `vv'_shr3_hq_1995_a = `vv'_shr2_hq_1995_a*term_hq_var
			gen `vv'_shr3_nonhq_1995_a = `vv'_shr2_nonhq_1995_a*term_nonhq_var
			gen `vv'_shr4_hq_1995_a = `vv'_shr2_hq_1995_a*term_hq_fixed
			gen `vv'_shr4_nonhq_1995_a = `vv'_shr2_nonhq_1995_a*term_nonhq_fixed
			gen `vv'_shr5_hq_1995_a = `vv'_shr2_hq_1995_a*term_hq_mean
			gen `vv'_shr5_nonhq_1995_a = `vv'_shr2_nonhq_1995_a*term_nonhq_mean
			drop term_* _term_*
		}



keep BvDIDnumber year lngdpgap_shr_nonhq_1995_a lswMPm_shr4_nonhq_1995_a hswMPm_shr4_nonhq_1995_a vaempMPm_shr4_nonhq_1995_a gdppcMPm_shr4_nonhq_1995_a
compress
save ${final_dir}/supplement_dataset${weight_window}_${wtype}_hqctry_restricted.dta, replace



}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat
